Last modification: 20 May, 2020
@EricJCGalvez
The data re-analized in this section has been previoulsy published in :
De Filippis, F., Pasolli, E., Tett, A., Tarallo, S., Naccarati, A., De Angelis, M., Neviani, E., Cocolin, L., Gobbetti, M., Segata, N., et al. (2019). Distinct Genetic and Functional Traits of Human Intestinal Prevotella copri Strains Are Associated with Different Habitual Diets. Cell Host Microbe 25, 444-453.e3.
## Loading required package: sp
## Checking rgeos availability: TRUE
## Loading required package: ggplot2
## Registered S3 methods overwritten by 'ggalt':
## method from
## grid.draw.absoluteGrob ggplot2
## grobHeight.absoluteGrob ggplot2
## grobWidth.absoluteGrob ggplot2
## grobX.absoluteGrob ggplot2
## grobY.absoluteGrob ggplot2
## Loading required package: viridisLite
## here() starts at /Users/ega13/MBP001/galvez_et_al_2020:/pMAGs_PULs_in_DeFilippis_et_al_2019_fig5/diversity_analysis
MAGs_table <- as_tibble(fread("../data/median_coverage_genomes.tsv", sep = "\t", header = T))
MAGs_table$V1 <- gsub(x = MAGs_table$V1, pattern = "-interlev", replacement = "")
MAGs_tax <- as_tibble(fread("../data/taxonomy.tsv", sep = "\t", header = T))
mapp_file <- as_tibble(fread("../data/Filippis_et_al_data_metadata.tsv"))
tree_data <- phyloseq::phy_tree(phyloseq::import_qiime(treefilename = "../data/tree.nwk")) ## Processing phylogenetic tree...
## ../data/tree.nwk ...
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## # A tibble: 4 x 2
## Location Freq
## <chr> <int>
## 1 Bari 16
## 2 Bologna 5
## 3 Parma 18
## 4 Torino 62
## # A tibble: 3 x 2
## Diet Freq
## <chr> <int>
## 1 Omnivore 25
## 2 Vegan 37
## 3 Vegetarian 39
## modified from Bob Rudis
## https://stat.ethz.ch/pipermail/r-help/2016-June/439123.html
# get italy region map
italy_map <- map_data("italy")
# your data will need to have these region names
print(unique(italy_map$region))## [1] "Bolzano-Bozen" "Belluno" "Udine" "Sondrio"
## [5] "Trento" "Novara" "Pordenone" "Brescia"
## [9] "Como" "Varese" "Treviso" "Bergamo"
## [13] "Gorizia" "Vicenza" "Aosta" "Vercelli"
## [17] "Venezia" "Verona" "Trieste" "Milano"
## [21] "Padova" "Torino" "Cremona" "Mantova"
## [25] "Pavia" "Alessandria" "Rovigo" "Piacenza"
## [29] "Asti" "Parma" "Reggio Emilia" "Ferrara"
## [33] "Modena" "Cuneo" "Bologna" "Genova"
## [37] "Ravenna" "Savona" "Massa-Carrara" "La Spezia"
## [41] "Forli'" "Lucca" "Firenze" "Pistoia"
## [45] "Imperia" "Pesaro e Urbino" "Arezzo" "Pisa"
## [49] "Ancona" "Livorno" "Perugia" "Siena"
## [53] "Macerata" "Ascoli Piceno" "Grosseto" "Terni"
## [57] "Teramo" "Viterbo" "Rieti" "L'Aquila"
## [61] "Pescara" "Chieti" "Roma" "Campobasso"
## [65] "Foggia" "Frosinone" "Isernia" "Latina"
## [69] "Caserta" "Benevento" "Bari" "Avellino"
## [73] "Sassari" "Potenza" "Napoli" "Brindisi"
## [77] "Nuoro" "Salerno" "Matera" "Taranto"
## [81] "Lecce" "Oristano" "Cosenza" "Cagliari"
## [85] "Catanzaro" "Messina" "Palermo" "Reggio Calabria"
## [89] "Trapani" "Catania" "Enna" "Caltanissetta"
## [93] "Agrigento" "Siracusa" "Ragusa"
# we'll simulate some data for this
set.seed(1492)
#choro_dat <- data_frame(region=unique(italy_map$region),
#value=sample(100, length(region)))
select_reg <- data.frame(region=c("Bari", "Bologna", "Parma", "Torino"),
number = c(16, 5, 18, 62) )
gg <- ggplot()
# lay down the base layer
gg <- gg + geom_map(data=italy_map, map=italy_map,
aes(long, lat, map_id=region),
color="#b2b2b2", size=0.1, fill="#252525")## Warning: Ignoring unknown aesthetics: x, y
# fill in the regions with the data
gg <- gg + geom_map(data=select_reg, map=italy_map,
aes(fill=as.character(number), map_id=region),
color="#b2b2b2", size=0.1)
# great color palette (use a better legend title)
gg <- gg + scale_fill_gdocs("Donors #")
# decent map projection for italy choropleth
#gg <- gg + coord_proj(italy_proj)
# good base theme for most maps
gg <- gg + theme_map()
# move the legend
gg <- gg #+ theme(legend.position=c(0.95, 0.3))
gg mags_matrix <- t(data.matrix(MAGs_table[,2:ncol(MAGs_table)]))
colnames(mags_matrix) <- MAGs_table$V1
otumags <- otu_table(mags_matrix, taxa_are_rows = T)
tax_matrix <- as.matrix(MAGs_tax[,3:ncol(MAGs_tax)])
row.names(tax_matrix) <- MAGs_tax$`# bin`
taxmags <- tax_table(tax_matrix)
samplemags <- sample_data(mapp_file)
row.names(samplemags) <- mapp_file$SampleID## You set `rngseed` to FALSE. Make sure you've set & recorded
## the random seed of your session for reproducibility.
## See `?set.seed`
## ...
## 378OTUs were removed because they are no longer
## present in any sample after random subsampling
## ...
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
## NMDS
MIKIbiomeR::beta_ordination(phylo_R, ordination_type = "NMDS",
color = "Diet", tagtype = NULL,
formula = c("Diet + Location + SRA_Study"))## Warning: Ignoring unknown parameters: stat
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1861141
## Run 1 stress 0.1874194
## Run 2 stress 0.1905015
## Run 3 stress 0.186121
## ... Procrustes: rmse 0.0008622721 max resid 0.008051025
## ... Similar to previous best
## Run 4 stress 0.2296703
## Run 5 stress 0.1907465
## Run 6 stress 0.1893578
## Run 7 stress 0.1861221
## ... Procrustes: rmse 0.001215341 max resid 0.01138018
## Run 8 stress 0.1907561
## Run 9 stress 0.1861139
## ... New best solution
## ... Procrustes: rmse 6.843207e-05 max resid 0.0003773191
## ... Similar to previous best
## Run 10 stress 0.1907561
## Run 11 stress 0.1861138
## ... New best solution
## ... Procrustes: rmse 3.961961e-05 max resid 0.0003182569
## ... Similar to previous best
## Run 12 stress 0.1893224
## Run 13 stress 0.1861138
## ... New best solution
## ... Procrustes: rmse 1.328881e-05 max resid 7.664712e-05
## ... Similar to previous best
## Run 14 stress 0.2012147
## Run 15 stress 0.1893223
## Run 16 stress 0.1861139
## ... Procrustes: rmse 4.762174e-05 max resid 0.000386171
## ... Similar to previous best
## Run 17 stress 0.1874194
## Run 18 stress 0.2328294
## Run 19 stress 0.1861139
## ... Procrustes: rmse 3.594775e-05 max resid 0.0002984794
## ... Similar to previous best
## Run 20 stress 0.1861202
## ... Procrustes: rmse 0.0008399812 max resid 0.007852233
## ... Similar to previous best
## *** Solution reached
## Warning: Removed 1 rows containing missing values (position_stack).
## NMDS
MIKIbiomeR::beta_ordination(phylo_R, ordination_type = "NMDS",
color = "Location",
geom_star = F,
formula = "")## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1861141
## Run 1 stress 0.1874196
## Run 2 stress 0.1893583
## Run 3 stress 0.2022387
## Run 4 stress 0.1874194
## Run 5 stress 0.1893221
## Run 6 stress 0.1861138
## ... New best solution
## ... Procrustes: rmse 5.122147e-05 max resid 0.0002759518
## ... Similar to previous best
## Run 7 stress 0.1861138
## ... New best solution
## ... Procrustes: rmse 1.401117e-05 max resid 9.493521e-05
## ... Similar to previous best
## Run 8 stress 0.2384704
## Run 9 stress 0.1907465
## Run 10 stress 0.2022387
## Run 11 stress 0.1907466
## Run 12 stress 0.239634
## Run 13 stress 0.1897001
## Run 14 stress 0.201219
## Run 15 stress 0.1907465
## Run 16 stress 0.2396903
## Run 17 stress 0.1907466
## Run 18 stress 0.1874194
## Run 19 stress 0.1874195
## Run 20 stress 0.1861222
## ... Procrustes: rmse 0.001253227 max resid 0.01173818
## *** Solution reached
## PCoA
MIKIbiomeR::beta_ordination(phylo_R, ordination_type = "PCoA",
color = "Location", shape = "Diet",
geom_star = F,
formula = "")MIKIbiomeR::abundance_plot(phylo, "SampleID", taxlev = "Order", facetby = "Diet", ntaxa = 12,
orderbar = "BF_ratio" ) +
theme(axis.text.x=element_blank())MIKIbiomeR::abundance_plot(phylo, "SampleID", taxlev = "Family", facetby = "Diet", ntaxa = 12,
orderbar = "BF_ratio" ) +
theme(axis.text.x=element_blank())MIKIbiomeR::abundance_plot(phylo_R, "SampleID", taxlev = "Phylum", facetby = "Diet", ntaxa = 12,
orderbar = "BF_ratio" ) +
theme(axis.text.x=element_blank())MIKIbiomeR::abundance_plot(phylo_R, "SampleID", taxlev = "Family", facetby = "Diet", ntaxa = 12,
orderbar = "BF_ratio" ) +
theme(axis.text.x=element_blank())phylo_prevo <- subset_taxa(physeq = phylo_R, Family == "Prevotellaceae")
mydata_rel_glom_aver.psq <- prune_taxa(names(sort(taxa_sums(phylo_prevo), T))[1:35], phylo_prevo)
mydata_rel_average.melt <- psmelt(mydata_rel_glom_aver.psq)
p <- ggplot(mydata_rel_average.melt[order(mydata_rel_average.melt$Order, decreasing = T),], aes(reorder(Sample, Abundance), y=Abundance, fill=OTU))
p <- p + geom_bar(stat="identity")
p + theme(axis.text.x = element_text(angle = 45, hjust = 0.9)) + ylab("MAGs mean coverage (%)") +
facet_wrap(~Diet, scales = "free_x", labeller = label_wrap_gen(multi_line=FALSE)) +
#scale_fill_brewer(palette = "Paired") + xlab("Donor#") +
theme(axis.text.x = element_blank()) +
ggplot2::scale_fill_manual(values = ggpubr::get_palette(palette = "simpsons",
k = 34)) +
xlab("Donor_Id") +
ggplot2::scale_y_continuous(limits = c(0, 80), expand=ggplot2::expand_scale(mult = 0.001, add =0)) +
ggplot2::scale_x_discrete(expand=c(0.05, 0)) +
# ylim(0,70) +
theme(axis.text.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())## Warning: `expand_scale()` is deprecated; use `expansion()` instead.
pcopri <- c("MAG609",
"MAG610",
"MAG611",
"MAG612",
"MAG613")
pcopri_col <- c("MAG609" ="#e31a1c",
"MAG610"="#238b45",
"MAG611"="#74c476",
"MAG612"="#d9f0a3",
"MAG613"="#1f78b4")
phylo_prevo <- subset_taxa(physeq = phylo_R, phyloseq::taxa_names(phylo_R) %in% pcopri)
mydata_rel_glom_aver.psq <- prune_taxa(names(sort(taxa_sums(phylo_prevo), T))[1:12], phylo_prevo)
mydata_rel_average.melt <- psmelt(mydata_rel_glom_aver.psq)
mydata_rel_average.melt$Diet <- factor(mydata_rel_average.melt$Diet,
levels = c("Omnivore", "Vegetarian", "Vegan"))
p <- ggplot(mydata_rel_average.melt[order(mydata_rel_average.melt$Order, decreasing = T),], aes(reorder(Sample, Abundance), y=Abundance, fill=OTU))
p <- p + geom_bar(stat="identity")
pf <- p + theme(axis.text.x = element_text(angle = 45, hjust = 0.9)) + ylab("MAGs mean coverage (%)") +
facet_wrap(~Diet, scales = "free_x", ncol = 4, labeller = label_wrap_gen(multi_line=FALSE)) +
scale_fill_manual(values = pcopri_col) + xlab("Donor_Id") +
ggplot2::scale_y_continuous(limits = c(0, 80), expand=ggplot2::expand_scale(mult = 0.001, add =0)) +
ggplot2::scale_x_discrete(expand=c(0.05, 0)) +
# ylim(0,70) +
theme(axis.text.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())## Warning: `expand_scale()` is deprecated; use `expansion()` instead.
##
## Attaching package: 'purrr'
## The following object is masked from 'package:maps':
##
## map
## The following object is masked from 'package:data.table':
##
## transpose
tbl_fread <-
list.files(path = "../data/predicted_PULs_in_pMAGs/", full.names = T, pattern = "*.puls.tsv") %>%
map_df(~fread(.))
puls_genes_sep <- tbl_fread %>% tidyr::separate(hmm, c("Cazy_Fam","sub_fam"), sep = "([\\_\\;])", remove = F)## Warning: Expected 2 pieces. Additional pieces discarded in 20 rows [3, 51, 112,
## 231, 408, 422, 424, 566, 736, 799, 1233, 1353, 1354, 1458, 1524, 1525, 1567,
## 1577, 1657, 1698].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1600 rows [1, 2,
## 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, ...].
## check for duplicates (forr ggenes can not be aligned with duplicate names)
# puls_genes_sep[!duplicated(puls_genes_sep[,c("pulid","sus")],fromLast=FALSE),]
## add a number to susC that is the aligning gene in this case and create a new column
df_first_sus <- puls_genes_sep[!duplicated(puls_genes_sep[,c("pulid","sus")],fromLast=FALSE),] %>%
mutate(., first_sus = ifelse(puls_genes_sep[!duplicated(puls_genes_sep[,
c("pulid","sus")], fromLast=FALSE),]$sus== "susC", "susC_1",
"none")
)
#Edit and combine Cazymes and Sus genes
puls_genes_sep$sus <- ifelse(puls_genes_sep$protein_id %in% (filter(df_first_sus, first_sus =="susC_1") %>% select(protein_id) %>% pull()), "susC_1", puls_genes_sep[["sus"]])
## combining step
puls_genes_sep$sus <- ifelse(puls_genes_sep$sus=="", puls_genes_sep[["Cazy_Fam"]],
puls_genes_sep[["sus"]])
puls_genes_sep$sus <- ifelse(puls_genes_sep$sus=="", "unk", puls_genes_sep[["sus"]])
#convering the strands to numeric -> ideally for arrows direction, WIP
puls_genes_sep$direction <- ifelse(puls_genes_sep$strand == "+", 1, -1)
puls_genes_sep$new_labels <- gsub('[0-9]+', '', puls_genes_sep$sus) %>%
gsub('_', '', .)
puls_genes_sep## genome pulid protein_id contig start end strand dist
## 1: MAG571 PUL1 MAG571_00776 gnl|HZI|MAG571_18 12184 13941 - 0
## 2: MAG571 PUL1 MAG571_00777 gnl|HZI|MAG571_18 13975 17061 - 34
## 3: MAG571 PUL2 MAG571_01512 gnl|HZI|MAG571_57 7242 8213 - -9819
## 4: MAG571 PUL2 MAG571_01513 gnl|HZI|MAG571_57 8228 8758 - 15
## 5: MAG571 PUL2 MAG571_01514 gnl|HZI|MAG571_57 9024 10748 - 266
## ---
## 1727: MAG655 PUL2 MAG655_01344 gnl|HZI|MAG655_20 21604 22305 + 31
## 1728: MAG655 PUL2 MAG655_01345 gnl|HZI|MAG655_20 22329 22865 + 24
## 1729: MAG655 PUL2 MAG655_01346 gnl|HZI|MAG655_20 23066 24622 + 201
## 1730: MAG655 PUL2 MAG655_01347 gnl|HZI|MAG655_20 24977 26401 + 355
## 1731: MAG655 PUL2 MAG655_01348 gnl|HZI|MAG655_20 26444 29755 + 43
## protein_name sus hmm Cazy_Fam sub_fam active direction
## 1: MAG571_00776 susD <NA> NA -1
## 2: MAG571_00777 susC_1 <NA> NA -1
## 3: MAG571_01512 GT2 GT2_Glycos_transf_2 GT2 Glycos NA -1
## 4: MAG571_01513 unk <NA> NA -1
## 5: MAG571_01514 susD <NA> NA -1
## ---
## 1727: MAG655_01344 unk <NA> NA 1
## 1728: MAG655_01345 unk <NA> NA 1
## 1729: MAG655_01346 PL9 PL9_2 PL9 2 NA 1
## 1730: MAG655_01347 unk <NA> NA 1
## 1731: MAG655_01348 GH78 GH78;CBM67 GH78 CBM67 NA 1
## new_labels
## 1: susD
## 2: susC
## 3: GT
## 4: unk
## 5: susD
## ---
## 1727: unk
## 1728: unk
## 1729: PL
## 1730: unk
## 1731: GH
trPUL_puls_sep <- puls_genes_sep %>%
dplyr::filter(., genome == "MAG609" & pulid == c("PUL4") |
genome == "MAG609" & pulid == c("PUL6") |
genome == "MAG610" & pulid == c("PUL10") |
genome == "MAG611" & pulid == c("PUL2") |
genome == "MAG611" & pulid == c("PUL4")
) %>%
tidyr::unite("pulid_unq", genome:pulid, sep="_", remove=F)library(gggenes)
PULs_col <- c("susC" = "#6a51a3",
"susD" = "#fd8d3c",
"GH2" = "#e0f3db",
"GH5" = "#ccebc5",
"GH10" = "#a8ddb5",
"GH13" = "#7bccc4",
"GH43" = "#4eb3d3",
"GH97" = "#2b8cbe",
"GH105" = "#0868ac",
"PL11" = "#f4a582",
"unk" = "#e0e0e0"
)
factor(trPUL_puls_sep$sus)## [1] GH43 unk GH5 susC susD susC susD unk GH10 susC susD susC
## [13] susD GH43 GH43 GH2 GH105 unk GH43 unk susD susC susD susC
## [25] GH43 GH43 susD susC susD susC unk unk unk unk GH13 GH13
## [37] GH97 susC susD susC susD unk PL11
## Levels: GH10 GH105 GH13 GH2 GH43 GH5 GH97 PL11 susC susD unk
ggplot(
trPUL_puls_sep,
aes(xmin = start, xmax = end, y = pulid_unq, fill = sus, forward = direction, label = sus)) +
geom_gene_arrow(arrow_body_height = grid::unit(4, "mm"), arrowhead_height = unit(4, "mm"),
arrowhead_width = unit(2, "mm")) +
geom_gene_label(align = "centre") +
facet_wrap(~ pulid_unq, scales = "free", ncol = 1) +
scale_fill_manual(values = PULs_col, breaks=c("susC", "susD", "GH2", "GH5", "GH10", "GH13",
"GH43", "GH97", "GH105", "PL11", "unk")) +
ylab("") +
xlab("Locus length (bp)") +
theme_genes()trPUL <- c("MAG609", "MAG610", "MAG611")
puls_data <- mydata_rel_average.melt %>%
dplyr::mutate(., PUL = ifelse(OTU %in% trPUL, "trPUL(+)", "trPUL(-)")) %>%
filter(., Abundance > 0.1) ## summarizing and getting the mean abundance by trPUL
sum_melted_abund <- puls_data %>%
dplyr::group_by(Sample, Diet, PUL) %>%
dplyr::summarise(sum_Abundance = sum(Abundance))
p <- sum_melted_abund %>%
ggplot(., aes(x = PUL, y = sum_Abundance), color = Sample) +
geom_boxplot() +
geom_jitter(aes(alpha=0.7), width = 0.25)
p <- p + theme(axis.text.x = element_text(angle = 45, hjust = 0.9)) + ylab("MAGs mean coverage (%)") +
facet_wrap(~Diet, scales = "free_x", ncol = 4, labeller = label_wrap_gen(multi_line=FALSE)) +
#scale_color_manual(values = pcopri_col) + xlab("Donor_Id") +
# ggplot2::scale_y_continuous(limits = c(0, 80), expand=ggplot2::expand_scale(mult = 0.001, add =0)) +
# ggplot2::scale_x_discrete(expand=c(0.05, 0)) +
# ylim(0,70) +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())
my_comp <- list(c("trPUL(+)", "trPUL(-)"))
p + ggpubr::stat_compare_means(comparisons = my_comp, method = "t.test") ###################function of summary SE
############# http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)
library("plyr")## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
##
## compact
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:here':
##
## here
## The following object is masked from 'package:maps':
##
## ozone
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
conf.interval=.95, .drop=TRUE) {
# New version of length which can handle NA's: if na.rm==T, don't count them
length2 <- function (x, na.rm=FALSE) {
if (na.rm) sum(!is.na(x))
else length(x)
}
# This does the summary. For each group's data frame, return a vector with
# N, mean, and sd
datac <- ddply(data, groupvars, .drop=.drop,
.fun = function(xx, col) {
c(N = length2(xx[[col]], na.rm=na.rm),
mean = mean (xx[[col]], na.rm=na.rm),
sd = sd (xx[[col]], na.rm=na.rm)
)
},
measurevar
)
# Rename the "mean" column
datac <- plyr::rename(datac, c("mean" = measurevar))
datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean
# Confidence interval multiplier for standard error
# Calculate t-statistic for confidence interval:
# e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
ciMult <- qt(conf.interval/2 + .5, datac$N-1)
datac$ci <- datac$se * ciMult
return(datac)
}my_cols_pal <-c("trPUL(+)" = "#525252", "trPUL(-)" ="#f0f0f0")
pul_stats <- summarySE(data = sum_melted_abund, measurevar = "sum_Abundance",
groupvars=c("Diet", "PUL"),
na.rm=T, conf.interval=.95, .drop=TRUE)
p <- ggplot(pul_stats, aes(x=PUL, y=sum_Abundance, fill=PUL)) +
geom_col(position = position_dodge(1), colour="black") +
facet_grid(~Diet) +
ggplot2::geom_errorbar(ggplot2::aes(ymin=sum_Abundance - se,
ymax=sum_Abundance + se),
width=.3, alpha=.7, position = position_dodge(0.8)) +
scale_fill_manual(values = my_cols_pal) +
scale_color_manual(values = "black") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 20),
legend.text=element_text(size = 18),
strip.text.x =(element_text(size = 20)),
axis.text.y =(element_text(size = 20))) +
ylab("MAGs mean coverage (%)") +
xlab("") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 20),
strip.text.x =(element_text(size = 20)),
axis.text.y =(element_text(size = 20))) +
ggplot2::scale_y_continuous(limits = c(0, 25),
expand=ggplot2::expand_scale(mult = 0.001, add =0)) ## Warning: `expand_scale()` is deprecated; use `expansion()` instead.
p + theme_base() + theme(axis.text.x = element_text(angle = 45, hjust = 0.9),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())p <- ggplot(puls_data, aes(x = Diet, y = Abundance), color = OTU)
p <- p + geom_boxplot() + geom_jitter(aes(color=OTU, alpha=0.5), width = 0.25)
pf <- p + ylab("MAGs mean coverage (%)") +
facet_wrap(~OTU, scales = "free", ncol =3, labeller = label_wrap_gen(multi_line=FALSE)) +
scale_color_manual(values = pcopri_col) +
scale_x_discrete(limit = c("Omnivore", "Vegetarian", "Vegan"),
labels = c("O", "V", "VG")) +
ggplot2::scale_y_continuous(limits = c(0, NA), expand=ggplot2::expand_scale(mult = 0.1, add =0.5)) +
# ggplot2::scale_x_discrete(expand=c(0.05, 0)) +
# ylim(0,70) +
theme(panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())## Warning: `expand_scale()` is deprecated; use `expansion()` instead.
#my_comp <- list(c("MAG609","MAG610"), c("MAG609","MAG611"), c("MAG609","MAG612"), c("MAG609","MAG613"))
my_comp <- list(c("Omnivore","Vegetarian"), c("Vegetarian", "Vegan"), c("Omnivore","Vegan"))
pf + ggpubr::stat_compare_means(comparisons = my_comp, label = "p.signif", method = "wilcox.test") ## Warning: Computation failed in `stat_signif()`:
## not enough 'y' observations
## Warning: Computation failed in `stat_signif()`:
## not enough 'y' observations